cafe = read_csv(here::here("data/Sidewalk_Caf__Licenses_and_Applications_clean.csv"))
## Parsed with column specification:
## cols(
## .default = col_character(),
## zip = col_double(),
## swc_sq_ft = col_double(),
## swc_tables = col_double(),
## swc_chairs = col_double(),
## lat = col_double(),
## long = col_double(),
## community_district = col_double(),
## city_council_district = col_double(),
## app_sq_ft = col_double(),
## app_tables = col_double(),
## app_chairs = col_double(),
## app_status_date = col_datetime(format = ""),
## expiration_date = col_datetime(format = ""),
## app_too_date = col_datetime(format = ""),
## submit_date = col_datetime(format = ""),
## intake_dd = col_datetime(format = ""),
## send_package_dd = col_datetime(format = ""),
## cp_dd = col_datetime(format = ""),
## cb_dd = col_datetime(format = ""),
## hearing_dd = col_datetime(format = "")
## # ... with 4 more columns
## )
## See spec(...) for full column specifications.
parking = read_csv(
here::here("data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv"),
col_types = "dccccdcccddddcdddcccccccccddclccccddcdcclll"
) %>%
janitor::clean_names()
## Warning: 1606926 parsing failures.
## row col expected actual file
## 188 Date First Observed no trailing characters ,200,626 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 274 Date First Observed no trailing characters ,200,619 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 458 Date First Observed no trailing characters ,200,605 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 468 Date First Observed no trailing characters ,200,702 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## 590 Law Section no trailing characters ,111 'C:/Users/YULIU/Desktop/study/data science/final_project/hot_coffee/data/Parking_Violations_Issued_-_Fiscal_Year_2021.csv'
## ... ................... ...................... ........ ..........................................................................................................................
## See problems(...) for more details.
plot_cafe_map =
parking %>%
count(street_name, name = "ticket") %>%
right_join(cafe) %>%
mutate(ticket = replace_na(ticket,1e-10))
## Joining, by = "street_name"
pal = colorNumeric(palette = c("viridis", "magma", "inferno","plasma")[[4]],
domain = plot_cafe_map$ticket %>% log())
plot_cafe_map =
plot_cafe_map%>%
mutate(pop =
str_c("<b>",business_name,"</b><br>",round(ticket)," tickets")) %>%
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(
~ long,
~ lat,
color = ~pal(ticket %>% log()),
radius = .1,
popup = ~ (pop)
)
plot_cafe_map
convert date and time format
parking_time =
parking %>%
select(issue_date, violation_time, summons_number, vehicle_make) %>%
separate(issue_date, into = c('month', 'day', 'year'), sep = '/') %>%
separate(violation_time, into = c('hour', 'min', 'am_pm'), sep = c(2,4)) %>%
filter(am_pm %in% c('P', 'A')) %>%
mutate(am_pm = recode(am_pm, `P` = 12, `A` = 0),
hour = as.numeric(hour)) %>%
mutate(hour = hour + am_pm,
month = as.numeric(month)) %>%
select(-year)
Make line plots: violation vs month
parking_time %>%
group_by(month) %>%
summarize(n = n()) %>%
plot_ly(x = ~month, y =~n, type = 'scatter',mode = 'line')%>%
layout(
title = 'Violations per Month',
xaxis = list(
type = 'category',
title = 'Month'),
yaxis = list(
title = 'Count of violations'))
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Line plots: vilation vs time
parking_time %>%
group_by(hour) %>%
summarize(n = n()) %>%
plot_ly(x = ~hour, y =~n, type = 'scatter',mode = 'line')%>%
layout(
title = 'Violations per Hour',
xaxis = list(
type = 'category',
title = 'Hour',
range = c(0, 24)),
yaxis = list(
title = 'Count of violations'))
## `summarise()` ungrouping output (override with `.groups` argument)